Load Libraries

# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(naniar)
## 
## Attaching package: 'naniar'
## 
## The following object is masked from 'package:skimr':
## 
##     n_complete
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(broom)
library(countrycode)

Data Wrangling Functions

load_energy_data <- function(url = "https://raw.githubusercontent.com/owid/energy-data/master/owid-energy-data.csv") {
  read_csv(url)
}

filter_energy_data <- function(df) {
  df %>%
    filter(!str_starts(iso_code, "OWID"), !is.na(country), !is.na(year)) %>%
    filter(year >= 2000) %>%
    select(where(~ sum(is.na(.)) < 0.4 * nrow(df))) %>%
    filter(!is.na(population), !is.na(gdp))
}

select_energy_columns <- function(df) {
  df %>%
    select(
      country, iso_code, year, population, gdp,
      electricity_generation,
      renewables_electricity, fossil_electricity,
      solar_electricity, wind_electricity, hydro_electricity,
      renewables_share_elec, coal_share_elec, gas_share_elec, oil_share_elec
    )
}

add_normalized_metrics <- function(df) {
  df %>%
    mutate(
      electricity_per_capita = electricity_generation / population,
      renewables_per_capita = renewables_electricity / population,
      fossil_per_capita = fossil_electricity / population,
      electricity_per_gdp = electricity_generation / gdp,
      gdp_per_electricity = gdp / (electricity_generation + 1)
    )
}

add_log_transforms <- function(df) {
  df %>%
    mutate(
      log_gdp = log(gdp + 1),
      log_population = log(population + 1),
      log_electricity = log(electricity_generation + 1)
    )
}

na_percentage <- function(df) {
  sapply(df, function(col) round(mean(is.na(col)) * 100, 2))
}

add_energy_ratios <- function(df) {
  df %>%
    mutate(
      fossil_to_renewable_ratio = fossil_electricity / (renewables_electricity + 1),
      fossil_share_elec = fossil_electricity / (electricity_generation + 1),
      solar_share = solar_electricity / (renewables_electricity + 1),
      wind_share = wind_electricity / (renewables_electricity + 1),
      hydro_share = hydro_electricity / (renewables_electricity + 1)
    )
}

add_classification_flags <- function(df) {
  df %>%
    mutate(
      high_renewable = if_else(renewables_share_elec > 50, 1, 0),
      transitioning = if_else(renewables_share_elec > fossil_share_elec, 1, 0)
    )
}

transform_energy_data <- function(df) {
  df %>%
    add_normalized_metrics() %>%
    add_log_transforms() %>%
    add_energy_ratios() %>%
    add_classification_flags()
}

Load Dataset 2 - Electricity Generation Data

# Load and process
energy_raw <- load_energy_data()
## Rows: 21812 Columns: 130
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (2): country, iso_code
## dbl (128): year, population, gdp, biofuel_cons_change_pct, biofuel_cons_chan...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cat("Number of rows in Raw Dataset :",  nrow(energy_raw),'\n')
## Number of rows in Raw Dataset : 21812
cat("Number of columns in Raw Dataset :", ncol(energy_raw),'\n')
## Number of columns in Raw Dataset : 130
energy_clean <- filter_energy_data(energy_raw)
cat("Number of rows in Filtered Dataset :",  nrow(energy_clean),'\n')
## Number of rows in Filtered Dataset : 3795
cat("Number of columns in Filtered Dataset :", ncol(energy_clean),'\n')
## Number of columns in Filtered Dataset : 130
energy_selected <- select_energy_columns(energy_clean)

cat("Number of columns in Dataset after dropping unnecessary columns:", ncol(energy_selected),'\n')
## Number of columns in Dataset after dropping unnecessary columns: 15
energy_transformed <- transform_energy_data(energy_selected)

# Add continent info
energy_transformed <- energy_transformed %>%
  mutate(continent = countrycode(country, "country.name", "continent"))

cat("Number of columns in Dataset after creating new columns and transformations:", ncol(energy_transformed),'\n')
## Number of columns in Dataset after creating new columns and transformations: 31

Check for Duplicates and Missing Values

### Checking for duplicated and % of missing values
energy_transformed %>% filter(duplicated(.))
### Checking for missing values
print("Percentage of Missing Values left :")
## [1] "Percentage of Missing Values left :"
na_percentage(energy_transformed)
##                   country                  iso_code                      year 
##                      0.00                      0.00                      0.00 
##                population                       gdp    electricity_generation 
##                      0.00                      0.00                      0.00 
##    renewables_electricity        fossil_electricity         solar_electricity 
##                      0.00                      0.00                      0.13 
##          wind_electricity         hydro_electricity     renewables_share_elec 
##                      0.00                      1.92                      0.13 
##           coal_share_elec            gas_share_elec            oil_share_elec 
##                      0.13                      0.71                      0.13 
##    electricity_per_capita     renewables_per_capita         fossil_per_capita 
##                      0.00                      0.00                      0.00 
##       electricity_per_gdp       gdp_per_electricity                   log_gdp 
##                      0.00                      0.00                      0.00 
##            log_population           log_electricity fossil_to_renewable_ratio 
##                      0.00                      0.00                      0.00 
##         fossil_share_elec               solar_share                wind_share 
##                      0.00                      0.13                      0.00 
##               hydro_share            high_renewable             transitioning 
##                      1.92                      0.13                      0.13 
##                 continent 
##                      0.00

Some latest stats about countries about renewable electricity

# Filter for latest year
latest_year <- max(energy_transformed$year, na.rm = TRUE)
latest_data <- energy_transformed %>% filter(year == latest_year)

# Get top countries for different metrics
top_renewables <- latest_data %>% arrange(desc(renewables_electricity)) %>% slice(1)
top_generation <- latest_data %>% arrange(desc(electricity_generation)) %>% slice(1)
top_per_capita <- latest_data %>% arrange(desc(electricity_per_capita)) %>% slice(1)
top_gdp <- latest_data %>% arrange(desc(gdp)) %>% slice(1)
top_renew_share <- latest_data %>% arrange(desc(renewables_share_elec)) %>% slice(1)

# Print the results
cat(glue::glue("Year considered: {latest_year}\n"))
## Year considered: 2022
cat(glue::glue(" Country with most renewable electricity: {top_renewables$country} ({round(top_renewables$renewables_electricity, 2)} TWh)\n"))
## Country with most renewable electricity: China (2670.18 TWh)
cat(glue::glue(" Country with highest electricity generation: {top_generation$country} ({round(top_generation$electricity_generation, 2)} TWh)\n"))
## Country with highest electricity generation: China (8848.73 TWh)
cat(glue::glue(" Country with highest GDP: {top_gdp$country} (${format(round(top_gdp$gdp, 0), big.mark=',')})\n"))
## Country with highest GDP: China ($2.696602e+13)
cat(glue::glue(" Country with highest renewables share: {top_renew_share$country} ({round(top_renew_share$renewables_share_elec, 2)}%)\n"))
## Country with highest renewables share: Albania (100%)

Load CO2 and Clean Data - CO2 Emissions Dataset -1

co2_data <- read_csv("https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv")
## Rows: 50191 Columns: 79
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): country, iso_code
## dbl (77): year, population, gdp, cement_co2, cement_co2_per_capita, co2, co2...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
filter_co2_data <- function(df) {
  df %>%
    filter(!str_starts(iso_code, "OWID"), !is.na(country), !is.na(year)) %>%
    filter(year >= 2000) %>%
    select(where(~ sum(is.na(.)) < 0.4 * nrow(df))) %>%
    filter(!is.na(population), !is.na(gdp))
}

co2_data = filter_co2_data(co2_data)

co2_data = co2_data %>% select(-c(cement_co2,cement_co2_per_capita,co2_growth_abs,co2_growth_prct,co2_including_luc_growth_abs,co2_including_luc_growth_prct,cumulative_cement_co2,cumulative_co2_including_luc,cumulative_luc_co2,flaring_co2,flaring_co2_per_capita,cumulative_flaring_co2,share_global_flaring_co2,share_global_cumulative_flaring_co2))

#COâ‚‚ emissions year-over-year change
co2_data = co2_data %>%
  group_by(country) %>%
  mutate(co2_pct_change = (co2 - lag(co2)) / lag(co2) * 100)
#Total COâ‚‚ from fossil fuel types 
co2_data = co2_data %>%
  mutate(fossil_fuel_co2 = coal_co2 + oil_co2 + gas_co2)
#log gdp
co2_data = co2_data %>% mutate(log_gdp = log(gdp))
#log co2
co2_data = co2_data %>% mutate(log_co2 = log(co2))

Merge Two Datasets to Single one - Using Country and Year as keys

co2_transformed <- co2_data


co2_transformed <- co2_transformed %>%
  select(-iso_code, -population, -gdp, -log_gdp)
# 

energy_co2_merged <- left_join(
  energy_transformed,
  co2_transformed,
  by = c("country", "year")
)


get_co2_electricity_ratio <- function(df) {
  df %>%
    mutate(
      co2_per_kwh = co2 / electricity_generation)
}

energy_co2_merged <- get_co2_electricity_ratio(energy_co2_merged)

#write_csv(energy_co2_merged, "/Users/manasamangipudi/Desktop/Semester-3/DataWrangling/Project/data/energy_co2_data_merged.csv")
energy_co2_merged

Exploratory Data Analysis

CO2 Dataset

CO2 Emissions vs Population - How GDP impacts it

library(ggrepel)
ggplot(energy_co2_merged %>% filter(year==2022), aes(x = population, y = co2, size = gdp, label = country)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_text_repel(max.overlaps = 10, size = 5) +
  scale_x_log10(labels = scales::comma) +
  scale_y_log10(labels = scales::comma) +
  labs(
    title = "Fossil Fuel CO2 Emissions vs Population (Bubble Size = GDP)",
    x = "Population (log scale)",
    y = "CO2 Emissions (log scale)",
    size = "GDP"
  ) +
  theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 140 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

CO2 Emissions of Global Super Powers

ggplot(energy_co2_merged %>% filter(country %in% c("United States","India","China","Germany","Brazil")),aes(x=year,y=co2,color = country))+
  geom_line(size=1)+
  labs(title = "CO2 Emisions Over Time for Global Super Powers ",y="CO2 (Million Tonnes)")+
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Paris Agreement Effect on CO2 Emissions

co2_change <- energy_co2_merged %>%
  filter(year %in% c(2015, 2022)) %>%
  select(country, year, co2,iso_code) %>%
  pivot_wider(names_from = year, values_from = co2, names_prefix = "co2_") %>%
  mutate(co2_diff_2015_2022 = co2_2022 - co2_2015)

co2_change %>%
  drop_na(co2_diff_2015_2022,iso_code) %>%
  arrange(co2_diff_2015_2022) %>%
  slice(1:15) %>%
  ggplot(aes(x = reorder(country, co2_diff_2015_2022,decreasing = TRUE), y = co2_diff_2015_2022)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Countries with Most Decrease in CO2 Emissions (2015–2022)",
    x = "Country",
    y = "Change in CO2 Emissions (Mt)"
  ) +
  theme_minimal()

Electricity Generation Dataset :

Electricity Generation over Time by Continents

ggplot(energy_co2_merged, aes(x = continent, y = electricity_generation)) +
  geom_boxplot(fill = "steelblue") +
  theme_minimal() +
  labs(title = "Electricity by Continent")

ggplot(energy_co2_merged, aes(x = continent, y = electricity_per_capita)) +
  geom_boxplot(fill = "steelblue") +
  theme_minimal() +
  labs(title = "Electricity per Capita by Continent")

energy_co2_merged %>%
  group_by(continent, year) %>%
  summarise(
    coal = sum(coal_share_elec, na.rm = TRUE),
    gas = sum(gas_share_elec, na.rm = TRUE),
    oil = sum(oil_share_elec, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = c("coal", "gas","oil"), names_to = "source", values_to = "electricity") %>%
  ggplot(aes(x = year, y = electricity, color = source)) +
  geom_line(size = 1) +
  facet_wrap(~continent, scales = "free_y") +
  theme_minimal() +
  labs(title = "Growth of Coal, Gas, Oil by Continent", y = "Electricity (TWh)")
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.

energy_co2_merged %>%
  group_by(continent, year) %>%
  summarise(
    wind = sum(wind_share, na.rm = TRUE),
    solar = sum(solar_share, na.rm = TRUE),
    hydro = sum(hydro_share, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = c("wind", "solar","hydro"), names_to = "source", values_to = "electricity") %>%
  ggplot(aes(x = year, y = electricity, color = source)) +
  geom_line(size = 1) +
  facet_wrap(~continent, scales = "free_y") +
  theme_minimal() +
  labs(title = "Growth of Wind, Solar, Hydro by Continent", y = "Electricity (TWh)")
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.

Comparing Renewable and Fossil-fuel based Electricity Generation

energy_by_continent <- energy_transformed %>%
  group_by(continent, year) %>%
  summarise(
    renewables = sum(renewables_electricity, na.rm = TRUE),
    fossil = sum(fossil_electricity, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = c("renewables", "fossil"), names_to = "source", values_to = "electricity") %>%
  ungroup()
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.
ggplot(energy_by_continent, aes(x = year, y = electricity, color = source)) +
  geom_line(size = 1) +
  facet_wrap(~continent, scales = "free_y") +
  theme_minimal() +
  labs(title = "Renewable vs Non-Renewable Electricity Generation by Continent Over Time")

Electricity vs GDP for year 2022 - Size representing Fossil_and_Renewable Ratio

ggplot(energy_co2_merged %>% filter(year==2022), aes(x = log_gdp, y = electricity_generation, size = fossil_to_renewable_ratio, label = country)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_text_repel(max.overlaps = 10, size = 5) +
  scale_x_log10(labels = scales::comma) +
  scale_y_log10(labels = scales::comma) +
  theme_minimal()
## Warning: ggrepel: 155 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Linear Relationship between GDP and Electricity Generation

Merged Dataset - EDA

Fossil Electricity and CO2 Emissions Linear Relationship

energy_co2_merged %>%
  group_by(continent, year) %>%
  summarise(
    fossil_elec = mean(fossil_electricity, na.rm = TRUE),
    co2 = mean(co2, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  ggplot(aes(x = fossil_elec, y = co2)) +
  geom_point(alpha = 0.6, color = "firebrick") +
  geom_smooth(method = "lm", color = "black") +
  facet_wrap(~continent, scales = "free") +
  labs(
    title = "Fossil Electricity vs CO2 Emissions by Continent",
    x = "Fossil Electricity (TWh)",
    y = "CO2 Emissions (Mt)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

CO2 Emissions per unit of electricity generated over time

energy_co2_merged %>%
  mutate(co2_per_twh = co2 /electricity_generation) %>%
  group_by(continent, year) %>%
  summarise(
    co2_efficiency = mean(co2_per_twh, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  ggplot(aes(x = year, y = co2_efficiency, color = continent)) +
  geom_line(size = 1.2) +
  labs(
    title = "COâ‚‚ Emissions per Unit of Electricity Generated",
    y = "COâ‚‚ / TWh",
    x = "Year"
  ) +
  theme_minimal()

GHG Emissions per unit of electricity generated over time

energy_co2_merged %>%
  mutate(ghg_per_twh = total_ghg /electricity_generation) %>%
  group_by(continent, year) %>%
  summarise(
    ghg_efficiency = mean(ghg_per_twh, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  ggplot(aes(x = year, y = ghg_efficiency, color = continent)) +
  geom_line(size = 1.2) +
  labs(
    title = "GHG Emissions per Unit of Electricity Generated",
    y = "GHG / TWh",
    x = "Year"
  ) +
  theme_minimal()

Linear regression of CO2 against every other column sorted by estimate

# 1. Filter for United States
us_data <- energy_co2_merged %>%
  filter(country == "United States") %>%
  select(-country, -iso_code, -continent, year)  # Remove non-numeric/grouping vars

# 2. Remove columns with all NA or zero variance
us_data <- us_data %>%
  select(where(is.numeric)) %>%
  select(where(~ sum(!is.na(.)) > 0)) %>%
  select(where(~ sd(., na.rm = TRUE) > 0))

# 3. Build models and extract p-values
results <- map_dfr(
  setdiff(names(us_data), "co2"),
  function(var) {
    df <- us_data %>% select(co2, !!sym(var)) %>% drop_na()
    if (nrow(df) < 10) return(NULL)  # skip if not enough data
    model <- lm(co2 ~ ., data = df)
    tidy(model) %>%
      filter(term != "(Intercept)") %>%
      mutate(variable = var)
  }
)

# 4. Print significant predictors (p >= 0.05)
significant <- results %>%
  filter(p.value < 0.05) %>%
  arrange(p.value)

head(significant %>% arrange(desc(estimate)))

Fossil Fuel vs CO2 Emissions in 2022 (By Population)

library(ggrepel)
ggplot(energy_co2_merged %>% filter(year==2022), aes(x = renewables_per_capita, y = co2, size = population, label = country)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_text_repel(max.overlaps = 5, size = 4) +
  scale_x_log10(labels = scales::comma) +
  scale_y_log10(labels = scales::comma) +
  labs(
    title = "Fossil Fuel CO2 Emissions vs Population (Bubble Size = Population)",
    x = "Renewables_per_capita",
    y = "CO2 Emissions (log scale)",
    size = "Population"
  ) +
  theme_minimal()
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 134 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Global Scatter Plot: CO2 per Capita vs Renewables Share
energy_co2_merged <- energy_co2_merged %>%
  mutate(co2_per_capita = co2 / population)

ggplot(energy_co2_merged, aes(x = renewables_share_elec, y = log(co2_per_capita))) +
  geom_point(alpha = 0.4, color = "darkgreen") +
  geom_smooth(method = "lm", color = "red") +
  labs(
    title = "Relationship between Renewable Electricity Share and CO2 Emissions per Capita",
    x = "Renewables Share (%)",
    y = "Log CO2 Emissions per Capita (tons)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Global Regression Model
model_global <- lm(co2_per_capita ~ renewables_share_elec + gdp, data = energy_co2_merged)
summary(model_global)
## 
## Call:
## lm(formula = co2_per_capita ~ renewables_share_elec + gdp, data = energy_co2_merged)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.362e-06 -3.485e-06 -8.430e-07  1.318e-06  6.067e-05 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.811e-06  1.471e-07  46.314   <2e-16 ***
## renewables_share_elec -6.285e-08  2.958e-09 -21.248   <2e-16 ***
## gdp                    4.601e-19  5.126e-20   8.976   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.028e-06 on 3764 degrees of freedom
##   (28 observations deleted due to missingness)
## Multiple R-squared:  0.1335, Adjusted R-squared:  0.133 
## F-statistic: 289.9 on 2 and 3764 DF,  p-value: < 2.2e-16
top5 <- c("Germany", "India", "Brazil", "United States", "China")

energy_co2_merged %>%
  filter(country %in% top5) %>%
  ggplot(aes(x = year, y = co2 / population, color = country)) +
  geom_line(size = 1) +
  labs(
    title = "CO2 Emissions per Capita Over Time (Top 5 Economies)",
    y = "CO2 per Capita (tons)",
    x = "Year"
  ) +
  theme_minimal()

top5 <- c("United States", "China", "India", "Germany", "Brazil")

energy_co2_merged %>%
  filter(country %in% top5) %>%
  ggplot(aes(x = year, y = renewables_share_elec, color = country)) +
  geom_line(size = 1) +
  labs(
    title = "Renewable Electricity Share Over Time (Top 5 Economies)",
    y = "Renewables Share (%)",
    x = "Year"
  ) +
  theme_minimal()

energy_co2_merged %>%
  group_by(year) %>%
  summarise(avg_co2_per_kwh = mean(co2 / electricity_generation, na.rm = TRUE)) %>%
  ggplot(aes(x = year, y = avg_co2_per_kwh)) +
  geom_line(color = "firebrick", size = 1.2) +
  labs(
    title = "Global Average CO2 Emissions per kWh Over Time",
    x = "Year",
    y = "CO2 per kWh"
  ) +
  theme_minimal()

energy_co2_merged %>%
  filter(year == max(year)) %>%
  mutate(co2_per_kwh = co2 / electricity_generation) %>%
  ggplot(aes(x = renewables_share_elec, y = co2_per_kwh)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(
    title = "Renewables Share vs COâ‚‚ per kWh (Latest Year)",
    x = "Renewables Share (%)",
    y = "COâ‚‚ per kWh"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).